#############################################################
# Author: Mike Burnham, mlb6496@psu.edu
# Python: 3.11.5
# OS: Windows 10
#
# Notes: This script reproduces the numbers found in the text
# of section 3.2.3: Validation
##############################################################

import pandas as pd
import logging
from sklearn.metrics import matthews_corrcoef as mcc

logging.basicConfig(level=logging.DEBUG, filename="sect3_2_3.log", filemode="a+", format="%(asctime)-15s %(levelname)-8s %(message)s")

df = pd.read_csv('./trump_test_nli.csv')

cols = ['gL', 'author', 'author2', 'wrote', 'wrote2', 'wrote3', 'doc', 'doc2', 'doc3']
res = []
for col in cols:
    res.append(mcc(df['adjudicated_sup'], df[col]))

logging.info("Max MCC: " + str(max(res).round(2)))
logging.info("Min MCC: " + str(min(res).round(2)))
logging.info("Mean MCC: " + str((sum(res)/len(res)).round(2)))